"""
统计Haploview的执行进度, 并清理不必要的文件
"""

import os

DIR="/public/home/yunlzhang/data/pan-genome/A188/haploview"

count = 0
sv_dirs = [f"{DIR}/{v}" for v in os.listdir(f"{DIR}") if v[0] != "." and os.path.isdir(f"{DIR}/{v}")]


def clear_files(sv_dir):
    need_clear_files = ["haploview.TESTS", "SV.id.vcf.gz", "SV.plink.log", "SV.plink.map", "SV.plink.nosex", "SV.plink.ped", "SV.vcf.gz"]
    for filename in need_clear_files:
        if os.path.exists(f"{sv_dir}/{filename}"):
            os.remove(f"{sv_dir}/{filename}")


for dir_path in sv_dirs:
    if os.path.exists(f"{dir_path}/haploview.TAGS"):
        count += 1
        clear_files(dir_path)

print(len(sv_dirs), count, round(count / len(sv_dirs), 3))
